{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "# GS Quant Meets Markets x MSCI"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Step 1: Import Modules"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# Import modules\n",
    "from typing import List\n",
    "\n",
    "from gs_quant.api.utils import ThreadPoolManager\n",
    "from gs_quant.data import Dataset\n",
    "from gs_quant.api.gs.assets import GsAssetApi\n",
    "from gs_quant.models.risk_model import FactorRiskModel, DataAssetsRequest\n",
    "from functools import partial\n",
    "\n",
    "from gs_quant.markets.baskets import Basket\n",
    "from gs_quant.markets.indices_utils import ReturnType\n",
    "from gs_quant.markets.position_set import PositionSet\n",
    "from gs_quant.session import Environment, GsSession\n",
    "import matplotlib.pyplot as plt\n",
    "import datetime as dt\n",
    "from dateutil.relativedelta import relativedelta\n",
    "import pandas as pd"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Step 2: Authenticate"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Initialize session -- for external users, input client id and secret below\n",
    "client = None\n",
    "secret = None\n",
    "GsSession.use(Environment.PROD, client_id=client, client_secret=secret, scopes='')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Step 3: Implement basic functions to fetch coverage universe, ratings, factor & liquidity data"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Initialize functions\n",
    "\n",
    "def batch_liquidity(dataset_id: str, asset_ids: list, day: dt.date, size: int=200) -> pd.DataFrame:\n",
    "    data = Dataset(dataset_id)\n",
    "    tasks = [partial(data.get_data, day, day, assetId=asset_ids[i:i+size]) for i in range(0, len(asset_ids), size)]\n",
    "    results = ThreadPoolManager.run_async(tasks)\n",
    "    return pd.concat(results)\n",
    "\n",
    "\n",
    "def batch_ratings(dataset_id: str, gsid_ids: list, day: dt.date, fallback_month, filter_value: str= None, size: int=5000) -> pd.DataFrame:\n",
    "    data = Dataset(dataset_id)\n",
    "    start_date = day - relativedelta(month=fallback_month)\n",
    "    tasks = [partial(data.get_data, start_date=start_date, gsid=gsid_ids[i:i+size], rating=filter_value) for i in range(0, len(gsid_ids), size)] if filter_value else \\\n",
    "            [partial(data.get_data, start_date=start_date, gsid=gsid_ids[i:i + size]) for i in range(0, len(gsid_ids), size)]\n",
    "    results = ThreadPoolManager.run_async(tasks)\n",
    "    return pd.concat(results)\n",
    "\n",
    "\n",
    "def batch_asset_request(day: dt.date, gsids_list: list, limit: int=1000) -> list:\n",
    "    date_time = dt.datetime.combine(day, dt.datetime.min.time())\n",
    "    fields = ['gsid', 'bbid', 'id', 'delisted', 'assetClassificationsIsPrimary']\n",
    "    tasks = [partial(GsAssetApi.get_many_assets_data, gsid=gsids_list[i:i + limit], as_of=date_time, limit=limit*10, fields=fields) for i in range(0, len(gsids_list), limit)]\n",
    "    results = ThreadPoolManager.run_async(tasks)\n",
    "    return [item for sublist in results for item in sublist]\n",
    "\n",
    "\n",
    "def get_universe_with_xrefs(day: dt.date, model: FactorRiskModel) -> pd.DataFrame:\n",
    "    print(f'---------Getting risk {model.id} coverage universe on {day}------------')\n",
    "    # get coverage universe on date\n",
    "    universe = model.get_asset_universe(day, day).iloc[:, 0].tolist()\n",
    "    print(f'{len(universe)} assets in {model.id} on {day} that map to gsids')\n",
    "    # need to map from id -> asset_identifier on date\n",
    "    asset_identifiers = pd.DataFrame(batch_asset_request(day, universe))\n",
    "    print(f'{len(asset_identifiers)} assets found')\n",
    "\n",
    "    asset_identifiers = asset_identifiers[asset_identifiers['assetClassificationsIsPrimary'] != 'false']\n",
    "    print(f'{len(asset_identifiers)} asset xrefs after is not primary dropped')\n",
    "    asset_identifiers = asset_identifiers[asset_identifiers['delisted'] != 'yes']\n",
    "    print(f'{len(asset_identifiers)} asset xrefs after delisted assets are dropped')\n",
    "\n",
    "    asset_identifiers = asset_identifiers[['gsid', 'bbid', 'id']].set_index('gsid')\n",
    "    asset_identifiers = asset_identifiers[~asset_identifiers.index.duplicated(keep='first')]  # remove duplicate gsids\n",
    "    asset_identifiers.reset_index(inplace=True)\n",
    "    print(f'{len(asset_identifiers)} positions after duplicate gsids removed')\n",
    "\n",
    "    return pd.DataFrame(asset_identifiers).set_index('id')\n",
    "\n",
    "\n",
    "def get_and_filter_ratings(day: dt.date, gsid_list: List[str], filter_value: str = None) -> list:\n",
    "    # get ratings of assets from the ratings dataset and only keep 'Buy' ratings\n",
    "    print(f'---------Filtering coverage universe by rating: {filter_value}------------')\n",
    "    fallback_month = 3\n",
    "    ratings_df = batch_ratings('RATINGS_CL', gsid_list, day, fallback_month, filter_value)\n",
    "    df_by_asset = [ratings_df[ratings_df['gsid'] == asset] for asset in set(ratings_df['gsid'].tolist())]\n",
    "    most_recent_rating = pd.concat([df.iloc[-1:] for df in df_by_asset])\n",
    "    print(f'{len(most_recent_rating)} unique assets with ratings after filtering applied')\n",
    "    return list(most_recent_rating['gsid'].unique())\n",
    "\n",
    "\n",
    "def get_and_filter_factor_exposures(day: dt.date, identifier_list: List[str], factor_model: FactorRiskModel, factors: List[str]= [] , filter_floor: int = 0.5) -> pd.DataFrame:\n",
    "    # get factor info and filter by factors\n",
    "    print(f'---------Filtering coverage universe by factors: {factors}------------')\n",
    "    available_factors = factor_model.get_factor_data(day).set_index('identifier')\n",
    "    req = DataAssetsRequest('gsid', identifier_list)\n",
    "    factor_exposures = factor_model.get_universe_factor_exposure(day, day, assets=req).fillna(0)\n",
    "\n",
    "    factor_exposures.columns = [available_factors.loc[x]['name'] for x in factor_exposures.columns]\n",
    "    factor_exposures = factor_exposures.droplevel(1)\n",
    "    print(f'{len(factor_exposures)} factor exposures available')\n",
    "    for factor in factors:\n",
    "        factor_exposures = factor_exposures[factor_exposures[factor] >= filter_floor]\n",
    "        print(f'{len(factor_exposures)} factor exposures returned after filtering by {factor} with floor exposure {filter_floor}')\n",
    "    return factor_exposures\n",
    "\n",
    "\n",
    "def get_and_filter_liquidity(day: dt.date, asset_ids: List[str], filter_floor: int = 0) -> pd.DataFrame:\n",
    "    # get mdv22Day liquidity info and take assets above average adv\n",
    "    print(f'---------Filtering coverage universe by liquidity value: {filter_floor}------------')\n",
    "    liquidity = batch_liquidity('GSEOD', asset_ids, day).set_index(\"assetId\")\n",
    "    print(f'{len(liquidity)} liquidity data available for requested universe')\n",
    "    if filter_floor:\n",
    "        liquidity = liquidity[liquidity['mdv22Day'] >= filter_floor]\n",
    "        print(f'{len(liquidity)} unique assets with liquidity data returned after filtering')\n",
    "    return liquidity\n",
    "\n",
    "\n",
    "def backtest_strategy(day: dt.date, position_set: List[dict], risk_model_id: str):\n",
    "    # make a request to pretrade liquidity to get backtest timeseries\n",
    "    print(f'---------Backtesting strategy------------')\n",
    "    query = {\"currency\":\"USD\",\n",
    "             \"notional\": 1000000,\n",
    "             \"date\": day.strftime(\"%Y-%m-%d\"),\n",
    "             \"positions\":position_set,\n",
    "             \"participationRate\":0.1,\n",
    "             \"riskModel\":risk_model_id,\n",
    "             \"timeSeriesBenchmarkIds\":[],\n",
    "             \"measures\":[\"Time Series Data\"]}\n",
    "    result = GsSession.current._post('/risk/liquidity', query)\n",
    "    result = result.get(\"timeseriesData\")\n",
    "    return result\n",
    "\n",
    "\n",
    "def graph_df_list(df_list, title):\n",
    "    for df in df_list:\n",
    "        plt.plot(df[0], label=df[1])\n",
    "    plt.legend(title='Measures')\n",
    "    plt.xlabel('Date')\n",
    "    plt.title(title)\n",
    "    plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Step 4: Strategy Implementation\n",
    "\n",
    "Proposed Methodology\n",
    "- Starting universe: Chosen risk model coverage universe\n",
    "- High Conviction names: Retain GS \"Buy\" ratings only\n",
    "- High ESG names: Retain high ESG scores only, using BARRA GEMLTL ESG model\n",
    "- High Profitability names: Retain high Profitability scores only, using BARRA GEMLTL ESG model\n",
    "- Liquidity adjustment: Removing the tail of illiquid names\n",
    "- Weighting: MDV-based weighting"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Get risk model and available style factors\n",
    "start = dt.datetime.now()\n",
    "\n",
    "# Get risk model\n",
    "model_id = \"BARRA_GEMLTL_ESG\"\n",
    "factor_model = FactorRiskModel.get(model_id)\n",
    "\n",
    "# Get last date of risk model data\n",
    "date = factor_model.get_most_recent_date_from_calendar() - dt.timedelta(1)\n",
    "print(f\"-----Available style factors for model {model_id}-----\")\n",
    "factor_data = factor_model.get_factor_data(date, date)\n",
    "factor_data = factor_data[factor_data['factorCategoryId'] == 'RI']\n",
    "print(factor_data['name'])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Get universe\n",
    "mqid_to_id = get_universe_with_xrefs(date, factor_model)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Get available ratings for past 3 months and return most recent ratings data per asset\n",
    "ratings_filter = 'Buy'\n",
    "ratings_universe = get_and_filter_ratings(date, mqid_to_id['gsid'].tolist(), filter_value=ratings_filter)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Pass in factors to filter by\n",
    "factors = ['ESG', 'Profitability']\n",
    "filter_floor = 0.5\n",
    "\n",
    "exposures = get_and_filter_factor_exposures(date, ratings_universe, factor_model, factors=factors, filter_floor=filter_floor)\n",
    "ids = mqid_to_id.reset_index().set_index(\"gsid\")\n",
    "exposures = exposures.join(ids, how='inner')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Filter by liquidity, which takes in the MQ Id\n",
    "asset_ids = exposures['id'].tolist()\n",
    "liquidity_floor = 1000000\n",
    "liquidity = get_and_filter_liquidity(date, asset_ids, filter_floor=liquidity_floor)\n",
    "liquidity = liquidity.join(mqid_to_id, how='inner')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Get weights as ADV / total ADV\n",
    "total_adv = sum(list(liquidity['mdv22Day']))\n",
    "liquidity['weights'] = liquidity['mdv22Day'] / total_adv"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Step 5: Backtest Strategy"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Backtest composition\n",
    "backtest_set = [{'assetId': index, \"weight\": row['weights']} for index, row in liquidity.iterrows()]\n",
    "position_set = [{'bbid': row['bbid'], \"weight\": row['weights']} for index, row in liquidity.iterrows()]\n",
    "print(\"Position set for basket create: \")\n",
    "print(pd.DataFrame(position_set))\n",
    "print(f'Total time to build position set with requested parameters {dt.datetime.now() - start}')\n",
    "\n",
    "\n",
    "backtest = backtest_strategy(date, backtest_set, model_id)\n",
    "print(\"Available measures to plot for backtested strategy: \")\n",
    "measures = list(backtest[0].keys())\n",
    "measures.remove(\"name\")\n",
    "print(measures)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Graph Normalized Performance\n",
    "np = ['normalizedPerformance']\n",
    "series_to_plot = []\n",
    "for measure in np:\n",
    "    timeseries = backtest[0].get(measure)\n",
    "    timeseries = {dt.datetime.strptime(data[0], \"%Y-%m-%d\"): data[1] for data in timeseries}\n",
    "    timeseries = (pd.Series(timeseries), measure)\n",
    "    series_to_plot.append(timeseries)\n",
    "\n",
    "graph_df_list(series_to_plot, \"Normalized Performance\")"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Plot many measures\n",
    "measures.remove(\"netExposure\")\n",
    "measures.remove(\"cumulativePnl\")\n",
    "measures.remove(\"maxDrawdown\")\n",
    "\n",
    "series_to_plot = []\n",
    "for measure in measures:\n",
    "    timeseries = backtest[0].get(measure)\n",
    "    timeseries = {dt.datetime.strptime(data[0], \"%Y-%m-%d\"): data[1] for data in timeseries}\n",
    "    timeseries = (pd.Series(timeseries), measure)\n",
    "    series_to_plot.append(timeseries)\n",
    "\n",
    "graph_df_list(series_to_plot, \"Backtested Strategy Measures\")"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "#### Step 6: Basket Creation"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Create basket with positions\n",
    "my_basket = Basket()\n",
    "my_basket.name = 'Basket Name'\n",
    "my_basket.ticker = 'Basket Ticker'\n",
    "my_basket.currency = 'USD'\n",
    "my_basket.return_type = ReturnType.PRICE_RETURN\n",
    "my_basket.publish_to_bloomberg = True\n",
    "my_basket.publish_to_reuters = True\n",
    "my_basket.publish_to_factset = False\n",
    "data=[]\n",
    "for row in position_set:\n",
    "    data.append([row['bbid'], row['weight']])\n",
    "\n",
    "positions_df = pd.DataFrame(data, columns=['identifier', 'weight'])\n",
    "position_set = PositionSet.from_frame(positions_df)\n",
    "my_basket.position_set = position_set\n",
    "\n",
    "my_basket.get_details() # we highly recommend verifying the basket state looks correct before calling create!"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# Publish basket\n",
    "my_basket.create()\n",
    "my_basket.poll_status(timeout=10000, step=20) # optional: constantly checks create status until report succeeds, fails, or the poll times out (this example checks every 20 seconds for 2 minutes)\n",
    "my_basket.get_url() # will return a url to your Marquee basket page ex. https://marquee.gs.com/s/products/MA9B9TEMQ2RW16K9/summary\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}